In [2]:
%matplotlib qt4
from __future__ import division

import math

from models import tools, optimize, models, filters
from models.tests import PerformanceTest

import numpy as np
import pandas as pd
import sklearn as sk
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from cycler import cycler

sns.set_style("ticks", {"legend.frameon": True})
mpl.rcParams['text.usetex'] = False
mpl.rcParams['text.latex.unicode'] = False
mpl.rcParams['axes.prop_cycle'] = cycler('color', ['#02A5F4', 'orange', 'green'])

In [3]:
data = tools.load_data(limit=500000, offset=1000000)
data = data[filters.sequentize(data)]


Loaded 469545 answers.

In [4]:
len(data)


Out[4]:
91242

Calibration


In [4]:
def produce_logger(SuperClass):

    class Logger(SuperClass):
    
        def __init__(self, *args, **kwargs):
            super(Logger, self).__init__(*args, **kwargs)
            self.timing = []

        def update(self, answer):
            item = self.items[answer.user_id, answer.place_id]
            if item.practices:
                diff = tools.time_diff(answer.inserted, item.last_inserted)
                self.timing += [(diff, answer.is_correct, self.predict(answer))]
            super(Logger, self).update(answer)
    
    return Logger

LogPFA = produce_logger(models.PFAModel)
LogPFAExt = produce_logger(models.PFAExt)
LogPFAGong = produce_logger(models.PFAGong)
LogPFAExtTiming = produce_logger(models.PFAExtTiming)
LogPFAExtSpacing = produce_logger(models.PFAExtSpacing)
LogPFAGongTiming = produce_logger(models.PFAGongTiming)
LogPFATiming = produce_logger(models.PFATiming)
LogPFAExtStaircase = produce_logger(models.PFAExtStaircase)

In [6]:
def time_effect_log(t, a=1.8, c=0.123):
    return a - c*np.log(t)

def time_effect_div(t, a=2, c=0.2):
    return a / (t+1)**c

def time_effect_exp(t, a=1.6, c=0.01):
    return a * np.exp(-c * np.sqrt(t))

In [5]:
def chunks(l, n):
    for i in xrange(0, len(l), n):
        yield l[i:i+n]

def interval_error(timings, interval_size=500, metric=tools.rmse):

    answers = sorted(timings, key=lambda p: p[0])

    def get_diffs_mean(chunk):
        return np.mean([diff for diff, _, _ in chunk])
    def get_answers_mean(chunk):
        return np.mean([pred - obs for _, obs, pred in chunk])
    def get_answers_value(chunk):
        predictions = [pred for _, obs, pred in chunk if np.isfinite(pred)]
        observation = [obs for _, obs, pred in chunk if np.isfinite(pred)]
        return metric(observation, predictions)

    return [
        (get_diffs_mean(chunk), get_answers_value(chunk))
        for chunk in chunks(answers, interval_size)
    ]

PFA/E, PFA/E/T, PFA/G, PFA/G/T


In [8]:
pfa = LogPFA(models.EloModel(), gamma=2, delta=-1)
pfa.train(data)

In [9]:
pfae = LogPFAExt(models.EloModel())
pfae.train(data)

In [165]:
def time_effect_div(t, a=2.5, c=0.17):
    return a / (t+1)**c

pfaet = LogPFAExtTiming(models.EloModel(), time_effect_fun=time_effect_log)
pfaet.train(data)

In [7]:
pfag = LogPFAGong(models.EloModel(), decay=0.349, gamma=2.040, delta=-0.11)
pfag.train(data)

In [167]:
def time_effect_div(t, a=1.2, c=0.15):
    return a / (t+1)**c

pfagt = LogPFAGongTiming(models.EloModel(), time_effect_fun=time_effect_div)
pfagt.train(data)

In [8]:
pfaest = LogPFAExtStaircase(models.EloModel(),
    gamma=2.2, delta=-0.9,
    staircase={
        (0, 60): 1.43,
        (60, 90): 1.17,
        (90, 150): 1.01,
        (150, 300): 0.93,
        (300, 600): 0.82,
        (600, 60*30): 0.78,
        (60*30, 60*60*3): 0.76,
        (60*60*3, 60*60*24): 0.63,
        (60*60*24, 60*60*24*5): 0.42,
        (60*60*24*5, np.inf): 0.12,
    }
)
pfaest.train(data)

PFA/E/Ts


In [8]:
def time_effect_log(t, a=1.8, c=0.123):
    return a - c*np.log(t)

pfaet1 = LogPFAExtTiming(models.EloModel(), time_effect_fun=time_effect_log)
pfaet1.train(data)

In [9]:
def time_effect_exp(t, a=1.6, c=0.01):
    return a * np.exp(-c * np.sqrt(t))

pfaet2 = LogPFAExtTiming(models.EloModel(), time_effect_fun=time_effect_exp)
pfaet2.train(data)

In [10]:
def time_effect_div(t, a=2.608, c=0.241):
    return a / (t+1)**c

pfaet3 = LogPFAExtTiming(models.EloModel(), gamma=2.293, delta=-0.664,
                         time_effect_fun=time_effect_div)
pfaet3.train(data)

PFA/G/Ts


In [11]:
def time_effect_log(t, a=0.8, c=0.05):
    return a - c*np.log(t)

pfagt1 = LogPFAGongTiming(models.EloModel(), time_effect_fun=time_effect_log)
pfagt1.train(data)

In [12]:
def time_effect_exp(t, a=0.5, c=0.002):
    return a * np.exp(-c * np.sqrt(t))

pfagt2 = LogPFAGongTiming(models.EloModel(), time_effect_fun=time_effect_exp)
pfagt2.train(data)

In [7]:
def time_effect_div(t, a=1.2, c=0.15):
    return a / (t+1)**c

pfagt3 = LogPFAGongTiming(models.EloModel(), time_effect_fun=time_effect_div)
pfagt3.train(data)

In [6]:
pfagt4 = LogPFATiming(models.EloModel(),
                      time_effect_good='pow', time_effect_bad='pow',
                      **{'a': 3.1384736895278618, 'c': 0.19758248174437759,
                         'b': 5.0679683848943906, 'd': 0.76393889411131488})
pfagt4.train(data)

In [10]:
m1 = pfagt3
m2 = pfagt4
m3 = pfaest

n1 = m1.ABBR + ' pow'
n2 = m2.ABBR + ' pow'
n3 = m3.ABBR + ''

metric = lambda y_true, y_pred: (np.mean(y_true) - np.mean(y_pred))

intervals1 = interval_error(m1.timing, interval_size=12000, metric=metric)
intervals2 = interval_error(m2.timing, interval_size=12000, metric=metric)
intervals3 = interval_error(m3.timing, interval_size=12000, metric=metric)

plt.figure(num=None, figsize=(5, 4), dpi=120)
plt.axhline(0, color='#888888', linestyle='--')

p1 = plt.plot([x[0] for x in intervals1], [x[1] for x in intervals1], 'o-')
p2 = plt.plot([x[0] for x in intervals2], [x[1] for x in intervals2], 'o-')
p3 = plt.plot([x[0] for x in intervals3], [x[1] for x in intervals3], 'o-')

plt.xscale('log')
plt.ylabel('Correctness - Prediction')
plt.xlabel('Time from previous attempt in seconds')
plt.xlim([min([x[0] for x in intervals1]), max([x[0] for x in intervals1])])
plt.ylim([-0.1, 0.1])

legend = plt.legend([p1[0], p2[0], p3[0]], (n1, n2, n3), loc='upper right', prop={'size': 12})
legend.get_frame().set_linewidth(1)

plt.show()
plt.tight_layout()

Mulitple Runs


In [26]:
plots = []
params = [(x, y) for x in [1.1, 1.2, 1.3] for y in [0.08, 0.09]]
intervals = []
metric = lambda y_true, y_pred: (np.mean(y_pred) - np.mean(y_true))

time_effect_template = lambda a, b: (lambda t: a - b*np.log(t))

for time_effect in [time_effect_template(*args) for args in params]:
    pfagt = LogPFAGongTiming(models.EloModel(), time_effect_fun=time_effect)
    pfagt.train(data)
    intervals += [interval_error(pfagt.timing, interval_size=1000, metric=metric)]
    print len(intervals), 'done'

for interval in intervals:
    plots += [plt.plot([x[0] for x in interval], [x[1] for x in interval], '.-')]

plt.xscale('log')
plt.ylabel('observed - predicted')
plt.xlabel('time from previous attempt (seconds)')
plt.xlim([min([x[0] for x in intervals[0]]) - 20, max([x[0] for x in intervals[0]]) + 100000])
plt.legend([p[0] for p in plots], map(lambda x: 'a={},b={}'.format(*x), params))
plt.show()


1 done
2 done
3 done
4 done
5 done
6 done

Different Approach


In [10]:
data = tools.add_spacing(data)

In [12]:
ranges = [0, 60, 90, 150, 300, 600, 1800, 10800, 86400, 259200, 2592000]
intervals = {i: None for i in zip(ranges, ranges[1:] + [np.inf])}

for interval in intervals.keys():
    lower, upper = interval
    data_slice = data[(data['spacing'] > lower) & (data['spacing'] < upper)].copy()
    
    if data_slice.empty:
        continue
        
    print interval, len(data_slice)
    
    pfaet = models.PFAExtTiming(models.EloModel(), time_effect_fun=lambda t: t/80)
    pfaet_test = PerformanceTest(pfaet, data_slice)
    pfaet_test.run()
    
    intervals[interval] = pfaet_test.results['train'].off
    
intervals = sorted([(np.mean(interval), value) for interval, value in intervals.items()], key=lambda x: x[0])


(600, 1800) 5403
(150, 300) 12653
(90, 150) 9562
(86400, 259200) 2470
(300, 600) 6758
(259200, 2592000) 355
(10800, 86400) 4430
(60, 90) 6124
(1800, 10800) 4449
(0, 60) 4970

In [19]:
ranges = [0, 60, 90, 150, 300, 600, 1800, 10800, 86400, 259200, 2592000]
intervals = {i: None for i in zip(ranges, ranges[1:] + [np.inf])}

for interval in intervals.keys():
    lower, upper = interval
    data_slice = data[(data['spacing'] > lower) & (data['spacing'] <= upper)]
    
    if len(data_slice) > 0:
        correct = len(data_slice[data_slice['is_correct'] == 1]) / len(data_slice)
        intervals[interval] = correct
        
intervals.pop((2592000, np.inf))

In [20]:
intervals = sorted([(np.mean(interval), value) for interval, value in intervals.items()], key=lambda x: x[0])
plt.plot([x[0] for x in intervals], [x[1] for x in intervals])
plt.xscale('log')
plt.show()

In [18]:
ind = np.arange(len(intervals))    # the x locations for the groups
width = 0.50       # the width of the bars: can also be len(x) sequence

correctness = [intervals[i] * 100 for i in sorted(intervals)]
incorrectness = [(1 - intervals[i]) * 100 for i in sorted(intervals)]

p1 = plt.bar(ind, correctness, width, color='#7FFF24')
p2 = plt.bar(ind, incorrectness, width, color='#ff512e', bottom=correctness)

plt.ylabel('%')
plt.xticks(ind+width/2., ('60 s', '90 s', '150 s', '5 m', '10 m',
                          '30 m', '3 h', '24 h', '3 d', '30 d'))
plt.yticks(np.arange(0, 101, 10))
plt.legend((p1[0], p2[0]), ('correct', 'incorrect'), loc=4)

plt.show()

In [6]:
items = {}

for _, row in data.iterrows():
    index = (row.user_id, row.place_id)
    answer = models.Answer(**row.to_dict())
    if index in items:
        items[index].append(answer)
    else:
        items[index] = [answer]

In [7]:
ranges = [0, 60, 90, 150, 300, 600, 1800, 10800, 86400, 259200, 2592000]
intervals = zip(ranges, ranges[1:] + [np.inf])

def get_interval(value, list_of_intervals):
    for lower, upper in list_of_intervals:
        if lower < value <= upper:
            return lower, upper

correct_before = {i: [] for i in intervals}
incorrect_before = {i: [] for i in intervals}

for index in items:
    answers = sorted(items[index], key=lambda x: x.inserted)
    for a1, a2 in zip(answers, answers[1:]):
        diff = tools.time_diff(a2.inserted, a1.inserted)
        interval = get_interval(diff, intervals)
        if interval is None:
            continue
        if a1.is_correct:
            correct_before[interval].append(a2.is_correct)
        else:
            incorrect_before[interval].append(a2.is_correct)

In [9]:
correct_intervals = {i: np.mean(v) for i, v in correct_before.items()}
incorrect_intervals = {i: np.mean(v) for i, v in incorrect_before.items()}

In [17]:
ind = (np.arange(len(intervals)-1) -1.2) * 1.15   # the x locations for the groups
width = 0.4       # the width of the bars: can also be len(x) sequence

correctness = [correct_intervals[i] * 100 for i in sorted(intervals) if i != (2592000, np.inf)]
incorrectness = [incorrect_intervals[i] * 100 for i in sorted(intervals) if i != (2592000, np.inf)]

p1 = plt.bar(ind-0.25, correctness, width, color='#7FFF24')
p2 = plt.bar(ind+0.25, incorrectness, width, color='#ff512e')

plt.ylabel('%')
plt.xticks(ind+width/2., ('60 s', '90 s', '150 s', '5 m', '10 m',
                          '30 m', '3 h', '24 h', '3 d', '30 d'))
plt.yticks(np.arange(0, 101, 10))
plt.legend((p2[0], p1[0]), ('incorrect before', 'correct before'), loc=4)

plt.show()

In [ ]: